-------------------------------------------------------------------------------------------------------
      name:  <unnamed>
       log:  /bbkinghome/assarsah/pkeval/ReplicationPackage/Public/Logs/20250929_PMO_merge_wide_b1-22.l
> og
  log type:  text
 opened on:  29 Sep 2025, 10:11:36

. 
. /*----------------------------------------------------*/
.             /* Section: Append batches */
. /*----------------------------------------------------*/
. 
. * load first batch
.         u "$KP_deid_admin/Raw/JPAL_DATA_1/cleanedbatch_1.dta", clear

.         gisid anon_id4

. 
. * merge in randomization weight
.         merge 1:1 anon_id4 using "$KP_deid_admin/Raw/JPAL_DATA_2/batch_1.dta", assert(3) nogen

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                         1,871,117  
    -----------------------------------------

.         assert !missing(bobot)

. 
.         gen batch = 1

. 
. * loop to append each batch
. * Batch 18-22 does not have code_domisili, instead province_code/name, city_code/name
. * Batch 18-22: final_test_score instead of test_score
. * Batch 18:22 ticket_score instaed of bobot
.         qui forval i = 2 / 22 {
Appending Batch 2
Appending Batch 3
Appending Batch 4
Appending Batch 5
Appending Batch 6
Appending Batch 7
Appending Batch 8
Appending Batch 9
Appending Batch 10
Appending Batch 11
Appending Batch 12
Appending Batch 13
Appending Batch 14
Appending Batch 15
Appending Batch 16
Appending Batch 17
Appending Batch 18
Appending Batch 19
Appending Batch 20
Appending Batch 21
Appending Batch 22

. 
. * Check uniqueness
.         gisid anon_id4 batch

.         gsort anon_id4 batch

. 
. 
. /*----------------------------------------------------*/
.                     /* Section: Clean */
. /*----------------------------------------------------*/
. 
. * count number of appearancess
.         tab batch, m

      batch |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |  1,871,117        3.20        3.20
          2 |  1,181,938        2.02        5.23
          3 |  1,262,894        2.16        7.39
          4 |    978,545        1.67        9.06
          5 |  1,126,203        1.93       10.99
          6 |  1,052,591        1.80       12.79
          7 |  1,835,499        3.14       15.93
          8 |  3,115,794        5.33       21.27
          9 |  3,023,877        5.18       26.44
         10 |  3,907,788        6.69       33.13
         11 |  3,575,984        6.12       39.25
         12 |  2,951,071        5.05       44.30
         13 |  4,461,661        7.64       51.94
         14 |  4,799,136        8.21       60.15
         15 |  5,159,381        8.83       68.98
         16 |  5,385,639        9.22       78.20
         17 |  1,463,865        2.51       80.70
         18 |  1,976,172        3.38       84.09
         19 |  2,346,775        4.02       88.10
         20 |  2,596,084        4.44       92.55
         21 |  2,076,678        3.55       96.10
         22 |  2,277,807        3.90      100.00
------------+-----------------------------------
      Total | 58,426,499      100.00

.         gsort anon_id4 batch

.         by anon_id4: gen appearances = _N

.         by anon_id4 (batch): gen last_appearance = _n == _N

.         tab appearances if last_appearance, m

appearances |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |  9,974,989       43.07       43.07
          2 |  4,800,350       20.73       63.80
          3 |  2,926,836       12.64       76.44
          4 |  2,057,812        8.89       85.33
          5 |  1,277,569        5.52       90.85
          6 |    833,739        3.60       94.45
          7 |    570,065        2.46       96.91
          8 |    341,661        1.48       98.38
          9 |    187,126        0.81       99.19
         10 |     97,008        0.42       99.61
         11 |     47,712        0.21       99.82
         12 |     23,176        0.10       99.92
         13 |     10,959        0.05       99.96
         14 |      4,833        0.02       99.98
         15 |      2,181        0.01       99.99
         16 |        963        0.00      100.00
         17 |        379        0.00      100.00
         18 |        136        0.00      100.00
         19 |         30        0.00      100.00
         20 |          7        0.00      100.00
         21 |          1        0.00      100.00
------------+-----------------------------------
      Total | 23,157,532      100.00

.         gstats summ appearances if last_appearance

                         appearances                         
-------------------------------------------------------------
      Percentiles      Smallest                              
  1%            1             1                              
  5%            1             1                              
 10%            1             1      Obs           23,157,532
 25%            1             1      Sum of Wgt.   23,157,532
                                                             
 50%            2                    Mean            2.523002
                        Largest      Std. Dev.        1.97642
 75%            3            20                              
 90%            5            20      Variance        3.906237
 95%            7            20      Skewness        1.685241
 99%            9            21      Kurtosis        6.124696

. 
. * check missings
.         count if mi(anon_hh_id)
  0

.         di `r(N)' / _N
0

.         *assert anon_id4 != "" // This one fails. Look into it.
.         assert batch != .

.         assert has_passed_current_batch != .

. 
. * check that if user gets accepted, no longer applies
. * NOTE: all the problematic observations are in batches 1-3
.         gsort anon_id4 batch

.         by anon_id4 (batch): gen appearance_num = _n

.         gen select_batch_not_last = appearance_num != appearances if has_passed_current_batch == 1
(47,465,686 missing values generated)

.         tab select_batch_not_last

select_batc |
 h_not_last |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 | 10,960,767      100.00      100.00
          1 |         46        0.00      100.00
------------+-----------------------------------
      Total | 10,960,813      100.00

. 
.         gsort anon_id4 batch

.         by anon_id4: egen apply_after_selected = total(select_batch_not_last)

.         tab apply_after_selected

apply_after |
  _selected |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 | 58,426,391      100.00      100.00
          1 |        108        0.00      100.00
------------+-----------------------------------
      Total | 58,426,499      100.00

.         tab batch if apply_after_selected == 1

      batch |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |         32       29.63       29.63
          2 |         35       32.41       62.04
          3 |         41       37.96      100.00
------------+-----------------------------------
      Total |        108      100.00

. 
. * is this the first batch individual applies in?
.         gsort anon_id4

.         by anon_id4: gegen first_apply_batch = min(batch)
performance warning: -by- prefix may be slower than -by()-

.         assert first_apply_batch != .

.         by anon_id4 : gen ord = _n

.         gen first_apply = batch == first_apply_batch

.         tab first_apply_batch if ord == 1

first_apply |
     _batch |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |  1,871,117        8.08        8.08
          2 |    535,148        2.31       10.39
          3 |    417,607        1.80       12.19
          4 |    625,521        2.70       14.90
          5 |    808,142        3.49       18.39
          6 |    736,890        3.18       21.57
          7 |  1,421,611        6.14       27.71
          8 |  2,061,877        8.90       36.61
          9 |  1,461,422        6.31       42.92
         10 |  1,798,212        7.77       50.69
         11 |    949,823        4.10       54.79
         12 |  1,006,066        4.34       59.13
         13 |  2,175,892        9.40       68.53
         14 |  1,030,635        4.45       72.98
         15 |  1,060,914        4.58       77.56
         16 |    943,007        4.07       81.63
         17 |    257,712        1.11       82.74
         18 |    676,022        2.92       85.66
         19 |    669,530        2.89       88.55
         20 |  1,003,458        4.33       92.89
         21 |    695,213        3.00       95.89
         22 |    951,713        4.11      100.00
------------+-----------------------------------
      Total | 23,157,532      100.00

.         tab batch first_apply, row

+----------------+
| Key            |
|----------------|
|   frequency    |
| row percentage |
+----------------+

           |      first_apply
     batch |         0          1 |     Total
-----------+----------------------+----------
         1 |         0  1,871,117 | 1,871,117 
           |      0.00     100.00 |    100.00 
-----------+----------------------+----------
         2 |   646,790    535,148 | 1,181,938 
           |     54.72      45.28 |    100.00 
-----------+----------------------+----------
         3 |   845,287    417,607 | 1,262,894 
           |     66.93      33.07 |    100.00 
-----------+----------------------+----------
         4 |   353,024    625,521 |   978,545 
           |     36.08      63.92 |    100.00 
-----------+----------------------+----------
         5 |   318,061    808,142 | 1,126,203 
           |     28.24      71.76 |    100.00 
-----------+----------------------+----------
         6 |   315,701    736,890 | 1,052,591 
           |     29.99      70.01 |    100.00 
-----------+----------------------+----------
         7 |   413,888  1,421,611 | 1,835,499 
           |     22.55      77.45 |    100.00 
-----------+----------------------+----------
         8 | 1,053,917  2,061,877 | 3,115,794 
           |     33.82      66.18 |    100.00 
-----------+----------------------+----------
         9 | 1,562,455  1,461,422 | 3,023,877 
           |     51.67      48.33 |    100.00 
-----------+----------------------+----------
        10 | 2,109,576  1,798,212 | 3,907,788 
           |     53.98      46.02 |    100.00 
-----------+----------------------+----------
        11 | 2,626,161    949,823 | 3,575,984 
           |     73.44      26.56 |    100.00 
-----------+----------------------+----------
        12 | 1,945,005  1,006,066 | 2,951,071 
           |     65.91      34.09 |    100.00 
-----------+----------------------+----------
        13 | 2,285,769  2,175,892 | 4,461,661 
           |     51.23      48.77 |    100.00 
-----------+----------------------+----------
        14 | 3,768,501  1,030,635 | 4,799,136 
           |     78.52      21.48 |    100.00 
-----------+----------------------+----------
        15 | 4,098,467  1,060,914 | 5,159,381 
           |     79.44      20.56 |    100.00 
-----------+----------------------+----------
        16 | 4,442,632    943,007 | 5,385,639 
           |     82.49      17.51 |    100.00 
-----------+----------------------+----------
        17 | 1,206,153    257,712 | 1,463,865 
           |     82.40      17.60 |    100.00 
-----------+----------------------+----------
        18 | 1,300,150    676,022 | 1,976,172 
           |     65.79      34.21 |    100.00 
-----------+----------------------+----------
        19 | 1,677,245    669,530 | 2,346,775 
           |     71.47      28.53 |    100.00 
-----------+----------------------+----------
        20 | 1,592,626  1,003,458 | 2,596,084 
           |     61.35      38.65 |    100.00 
-----------+----------------------+----------
        21 | 1,381,465    695,213 | 2,076,678 
           |     66.52      33.48 |    100.00 
-----------+----------------------+----------
        22 | 1,326,094    951,713 | 2,277,807 
           |     58.22      41.78 |    100.00 
-----------+----------------------+----------
     Total |35,268,967 23,157,532 |58,426,499 
           |     60.36      39.64 |    100.00 

.         assert first_apply_batch != .

. 
. * check number of individuals applying from same HH
.         gsort anon_hh_id batch

.         by anon_hh_id batch: gen num_apply_in_batch_hh = _N if anon_hh_id != .

.         gegen unique_applicants_hh = tag(anon_hh_id anon_id4)

.         by anon_hh_id: gegen total_apply_hh = total(unique_applicants_hh)
performance warning: -by- prefix may be slower than -by()-

.         gen hh_applied = 0

.         replace hh_applied = 1 if num_apply_in_batch_hh > 1
(14,232,573 real changes made)

. 
. * does HH win in this batch?
.         gsort anon_hh_id batch

.         by anon_hh_id : replace ord = _n
(27680831 real changes made)

.         by anon_hh_id batch: gegen hh_wins_in_batch = total(has_passed_current_batch) if anon_hh_id !
> = .
performance warning: -by- prefix may be slower than -by()-

.         tab batch hh_wins_in_batch if ord == 1, row

+----------------+
| Key            |
|----------------|
|   frequency    |
| row percentage |
+----------------+

           |                               hh_wins_in_batch
     batch |         0          1          2          3          4          5          6 |     Total
-----------+-----------------------------------------------------------------------------+----------
         1 | 1,209,848    120,412      2,595         67          3          0          0 | 1,332,926 
           |     90.77       9.03       0.19       0.01       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
         2 |   284,308     81,339      1,726         43          2          0          0 |   367,418 
           |     77.38      22.14       0.47       0.01       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
         3 |   223,936     45,426        877         15          0          0          0 |   270,254 
           |     82.86      16.81       0.32       0.01       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
         4 |   200,729    262,825     18,726        858         71          1          1 |   483,211 
           |     41.54      54.39       3.88       0.18       0.01       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
         5 |   201,306    382,982     32,802      1,519        122         12          0 |   618,743 
           |     32.53      61.90       5.30       0.25       0.02       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
         6 |   130,980    388,020     34,761      1,692        120         13          1 |   555,587 
           |     23.58      69.84       6.26       0.30       0.02       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
         7 |   574,874    447,159     30,934      1,271         73          4          2 | 1,054,317 
           |     54.53      42.41       2.93       0.12       0.01       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
         8 | 1,099,966    389,617     20,294        647         38          5          0 | 1,510,567 
           |     72.82      25.79       1.34       0.04       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
         9 |   751,275    298,527     15,491        577         27          4          1 | 1,065,902 
           |     70.48      28.01       1.45       0.05       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
        10 | 1,176,399     33,763        572         26          9          3          0 | 1,210,772 
           |     97.16       2.79       0.05       0.00       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
        11 |   617,583     52,003      1,690        147         27          3          0 |   671,454 
           |     91.98       7.74       0.25       0.02       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
        12 |   527,276    131,274      5,283          0          0          0          0 |   663,833 
           |     79.43      19.78       0.80       0.00       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
        13 | 1,226,658    207,092     10,732          0          0          0          0 | 1,444,482 
           |     84.92      14.34       0.74       0.00       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
        14 |   578,021    102,081      7,829          0          0          0          0 |   687,931 
           |     84.02      14.84       1.14       0.00       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
        15 |   664,127     35,254      7,359          0          0          0          0 |   706,740 
           |     93.97       4.99       1.04       0.00       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
        16 |   589,007     39,062        444          0          0          0          0 |   628,513 
           |     93.71       6.21       0.07       0.00       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
        17 |   181,234      4,981          8          0          0          0          0 |   186,223 
           |     97.32       2.67       0.00       0.00       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
        18 |   383,077    258,571     18,155          0          0          0          0 |   659,803 
           |     58.06      39.19       2.75       0.00       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
        19 |   255,041    152,777     10,521          0          0          0          0 |   418,339 
           |     60.97      36.52       2.51       0.00       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
        20 |   269,467    145,501      9,249          0          0          0          0 |   424,217 
           |     63.52      34.30       2.18       0.00       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
        21 |   164,032    115,515      5,737          0          0          0          0 |   285,284 
           |     57.50      40.49       2.01       0.00       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
        22 |   340,240      7,536         18          0          0          0          0 |   347,794 
           |     97.83       2.17       0.01       0.00       0.00       0.00       0.00 |    100.00 
-----------+-----------------------------------------------------------------------------+----------
     Total |11,649,384  3,701,717    235,803      6,862        492         45          5 |15,594,310 
           |     74.70      23.74       1.51       0.04       0.00       0.00       0.00 |    100.00 


           |   hh_wins_in_batch
     batch |         7        487 |     Total
-----------+----------------------+----------
         1 |         0          1 | 1,332,926 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
         2 |         0          0 |   367,418 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
         3 |         0          0 |   270,254 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
         4 |         0          0 |   483,211 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
         5 |         0          0 |   618,743 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
         6 |         0          0 |   555,587 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
         7 |         0          0 | 1,054,317 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
         8 |         0          0 | 1,510,567 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
         9 |         0          0 | 1,065,902 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
        10 |         0          0 | 1,210,772 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
        11 |         1          0 |   671,454 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
        12 |         0          0 |   663,833 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
        13 |         0          0 | 1,444,482 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
        14 |         0          0 |   687,931 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
        15 |         0          0 |   706,740 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
        16 |         0          0 |   628,513 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
        17 |         0          0 |   186,223 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
        18 |         0          0 |   659,803 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
        19 |         0          0 |   418,339 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
        20 |         0          0 |   424,217 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
        21 |         0          0 |   285,284 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
        22 |         0          0 |   347,794 
           |      0.00       0.00 |    100.00 
-----------+----------------------+----------
     Total |         1          1 |15,594,310 
           |      0.00       0.00 |    100.00 

. 
.         preserve

.         gsort anon_hh_id batch

.         by anon_hh_id batch: replace ord = _n
(40458939 real changes made)

.         keep if ord == 1
(7,749,327 observations deleted)

.         tab batch hh_wins_in_batch

           |                               hh_wins_in_batch
     batch |         0          1          2          3          4          5          6 |     Total
-----------+-----------------------------------------------------------------------------+----------
         1 | 1,209,848    120,412      2,595         67          3          0          0 | 1,332,926 
         2 |   707,133    188,392      4,890        109          5          0          0 |   900,530 
         3 |   819,280    136,763      2,342         26          2          0          0 |   958,414 
         4 |   339,392    498,915     37,699      1,936        168          5          1 |   878,116 
         5 |   344,535    619,081     49,364      2,524        178         14          0 | 1,015,696 
         6 |   268,984    636,687     49,765      2,500        181         16          1 |   958,134 
         7 |   938,383    654,776     40,799      1,720         96          4          2 | 1,635,780 
         8 | 2,035,208    671,840     30,885        958         47          5          1 | 2,738,944 
         9 | 1,995,448    694,413     27,172        880         38          4          2 | 2,717,957 
        10 | 3,260,794     88,829        832         32          9          3          0 | 3,350,499 
        11 | 2,819,760    258,631      5,644        231         30          3          0 | 3,084,300 
        12 | 2,048,823    548,577     17,948          0          0          0          0 | 2,615,348 
        13 | 3,257,247    551,193     17,499          0          0          0          0 | 3,825,939 
        14 | 3,556,131    554,314     16,606          0          0          0          0 | 4,127,051 
        15 | 3,869,755    541,592     24,161          0          0          0          0 | 4,435,508 
        16 | 4,248,936    350,970      4,168          0          0          0          0 | 4,604,074 
        17 | 1,326,674     41,716         82          0          0          0          0 | 1,368,472 
        18 | 1,003,241    677,444     47,715          0          0          0          0 | 1,728,401 
        19 | 1,346,431    700,854     36,950          0          0          0          0 | 2,084,236 
        20 | 1,568,716    710,602     31,461          0          0          0          0 | 2,310,780 
        21 | 1,211,099    685,717     24,006          0          0          0          0 | 1,920,823 
        22 | 2,040,328     44,840         75          0          0          0          0 | 2,085,244 
-----------+-----------------------------------------------------------------------------+----------
     Total |40,216,146  9,976,558    472,658     10,983        757         54          7 |50,677,172 


           |                               hh_wins_in_batch
     batch |         7         32        206        247        291        343        446 |     Total
-----------+-----------------------------------------------------------------------------+----------
         1 |         0          0          0          0          0          0          0 | 1,332,926 
         2 |         0          0          0          0          0          0          0 |   900,530 
         3 |         0          0          0          0          0          0          1 |   958,414 
         4 |         0          0          0          0          0          0          0 |   878,116 
         5 |         0          0          0          0          0          0          0 | 1,015,696 
         6 |         0          0          0          0          0          0          0 |   958,134 
         7 |         0          0          0          0          0          0          0 | 1,635,780 
         8 |         0          0          0          0          0          0          0 | 2,738,944 
         9 |         0          0          0          0          0          0          0 | 2,717,957 
        10 |         0          0          0          0          0          0          0 | 3,350,499 
        11 |         1          0          0          0          0          0          0 | 3,084,300 
        12 |         0          0          0          0          0          0          0 | 2,615,348 
        13 |         0          0          0          0          0          0          0 | 3,825,939 
        14 |         0          0          0          0          0          0          0 | 4,127,051 
        15 |         0          0          0          0          0          0          0 | 4,435,508 
        16 |         0          0          0          0          0          0          0 | 4,604,074 
        17 |         0          0          0          0          0          0          0 | 1,368,472 
        18 |         0          0          1          0          0          0          0 | 1,728,401 
        19 |         0          0          0          1          0          0          0 | 2,084,236 
        20 |         0          0          0          0          1          0          0 | 2,310,780 
        21 |         0          0          0          0          0          1          0 | 1,920,823 
        22 |         0          1          0          0          0          0          0 | 2,085,244 
-----------+-----------------------------------------------------------------------------+----------
     Total |         1          1          1          1          1          1          1 |50,677,172 


           |   hh_wins_in_batch
     batch |       487        591 |     Total
-----------+----------------------+----------
         1 |         1          0 | 1,332,926 
         2 |         0          1 |   900,530 
         3 |         0          0 |   958,414 
         4 |         0          0 |   878,116 
         5 |         0          0 | 1,015,696 
         6 |         0          0 |   958,134 
         7 |         0          0 | 1,635,780 
         8 |         0          0 | 2,738,944 
         9 |         0          0 | 2,717,957 
        10 |         0          0 | 3,350,499 
        11 |         0          0 | 3,084,300 
        12 |         0          0 | 2,615,348 
        13 |         0          0 | 3,825,939 
        14 |         0          0 | 4,127,051 
        15 |         0          0 | 4,435,508 
        16 |         0          0 | 4,604,074 
        17 |         0          0 | 1,368,472 
        18 |         0          0 | 1,728,401 
        19 |         0          0 | 2,084,236 
        20 |         0          0 | 2,310,780 
        21 |         0          0 | 1,920,823 
        22 |         0          0 | 2,085,244 
-----------+----------------------+----------
     Total |         1          1 |50,677,172 

.         restore

. 
.         gen hh_win_in_batch = hh_wins_in_batch > 0 if !missing(hh_wins_in_batch)

.         tab batch hh_win_in_batch if ord == 1, row

+----------------+
| Key            |
|----------------|
|   frequency    |
| row percentage |
+----------------+

           |    hh_win_in_batch
     batch |         0          1 |     Total
-----------+----------------------+----------
         1 | 1,209,848    123,078 | 1,332,926 
           |     90.77       9.23 |    100.00 
-----------+----------------------+----------
         2 |   284,308     83,110 |   367,418 
           |     77.38      22.62 |    100.00 
-----------+----------------------+----------
         3 |   223,936     46,318 |   270,254 
           |     82.86      17.14 |    100.00 
-----------+----------------------+----------
         4 |   200,729    282,482 |   483,211 
           |     41.54      58.46 |    100.00 
-----------+----------------------+----------
         5 |   201,306    417,437 |   618,743 
           |     32.53      67.47 |    100.00 
-----------+----------------------+----------
         6 |   130,980    424,607 |   555,587 
           |     23.58      76.42 |    100.00 
-----------+----------------------+----------
         7 |   574,874    479,443 | 1,054,317 
           |     54.53      45.47 |    100.00 
-----------+----------------------+----------
         8 | 1,099,966    410,601 | 1,510,567 
           |     72.82      27.18 |    100.00 
-----------+----------------------+----------
         9 |   751,275    314,627 | 1,065,902 
           |     70.48      29.52 |    100.00 
-----------+----------------------+----------
        10 | 1,176,399     34,373 | 1,210,772 
           |     97.16       2.84 |    100.00 
-----------+----------------------+----------
        11 |   617,583     53,871 |   671,454 
           |     91.98       8.02 |    100.00 
-----------+----------------------+----------
        12 |   527,276    136,557 |   663,833 
           |     79.43      20.57 |    100.00 
-----------+----------------------+----------
        13 | 1,226,658    217,824 | 1,444,482 
           |     84.92      15.08 |    100.00 
-----------+----------------------+----------
        14 |   578,021    109,910 |   687,931 
           |     84.02      15.98 |    100.00 
-----------+----------------------+----------
        15 |   664,127     42,613 |   706,740 
           |     93.97       6.03 |    100.00 
-----------+----------------------+----------
        16 |   589,007     39,506 |   628,513 
           |     93.71       6.29 |    100.00 
-----------+----------------------+----------
        17 |   181,234      4,989 |   186,223 
           |     97.32       2.68 |    100.00 
-----------+----------------------+----------
        18 |   383,077    276,726 |   659,803 
           |     58.06      41.94 |    100.00 
-----------+----------------------+----------
        19 |   255,041    163,298 |   418,339 
           |     60.97      39.03 |    100.00 
-----------+----------------------+----------
        20 |   269,467    154,750 |   424,217 
           |     63.52      36.48 |    100.00 
-----------+----------------------+----------
        21 |   164,032    121,252 |   285,284 
           |     57.50      42.50 |    100.00 
-----------+----------------------+----------
        22 |   340,240      7,554 |   347,794 
           |     97.83       2.17 |    100.00 
-----------+----------------------+----------
     Total |11,649,384  3,944,926 |15,594,310 
           |     74.70      25.30 |    100.00 

. 
. /*----------------------------------------------------*/
.                     /* Section: Reshape */
. /*----------------------------------------------------*/
. 
. * drop users that apply after being selected
. // NOTE: may change handling of this later
.         drop if apply_after_selected == 1
(108 observations deleted)

.         gsort anon_id4

. 
. * Prep to reshape wide
.         gen applied = 1

.         rename aaa* aaa*_

.         compress
  variable education was long now byte
  variable age was float now int
  variable anon_month_dob was float now int
  variable bobot was float now byte
  variable batch was float now byte
  variable appearances was float now byte
  variable last_appearance was float now byte
  variable appearance_num was float now byte
  variable select_batch_not_last was float now byte
  variable apply_after_selected was float now byte
  variable first_apply_batch was float now byte
  variable first_apply was float now byte
  variable unique_applicants_hh was float now byte
  variable hh_applied was float now byte
  variable hh_win_in_batch was float now byte
  variable applied was float now byte
  variable anon_id4 was double now long
  variable total_apply_hh was double now long
  variable hh_wins_in_batch was double now int
  (3,505,583,460 bytes saved)

. 
. * Reshape wide
.         local varlist applied ///
>              anon_hh_id ///
>              has_passed_current_batch ///
>              first_apply_batch ///
>              bobot ///
>              year_dob ///
>              anon_month_dob ///
>              gender ///
>              education ///
>              test_score ///
>                          hh_applied ///
>                          hh_win_in_batch ///
>                          aaa*_

. 
.         keep `varlist' anon_id4 batch

. 
.         greshape wide `varlist ', ///
>                 i(anon_id4) j(batch) nochecks benchmark
(note: reshape left unsorted; original order not preserved)
(note: j = 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22)
Writing reshape to disk:
Parsed by variables; .041 seconds
C plugin runtime; 147.9 seconds
Internal gtools runtime; 148.1 seconds

Reading reshape from disk:
Parsed by variables; .006 seconds
C plugin runtime; 165 seconds
Internal gtools runtime; 165.1 seconds

Data                               long   ->   wide
-----------------------------------------------------------------------------
Number of obs.               58,426,391   ->   23,157,486           
Number of variables                   39  ->   815                  
j (22 values)                     batch   ->   (dropped)
xij variables:
                                applied   ->   applied1 applied2 ... applied22
                             anon_hh_id   ->   anon_hh_id1 anon_hh_id2 ... anon_hh_id22
               has_passed_current_batch   ->   has_passed_current_batch1 has_passed_current_batch2 ... 
> has_passed_current_batch22
                      first_apply_batch   ->   first_apply_batch1 first_apply_batch2 ... first_apply_ba
> tch22
                                  bobot   ->   bobot1 bobot2 ... bobot22
                               year_dob   ->   year_dob1 year_dob2 ... year_dob22
                         anon_month_dob   ->   anon_month_dob1 anon_month_dob2 ... anon_month_dob22
                                 gender   ->   gender1 gender2 ... gender22
                              education   ->   education1 education2 ... education22
                             test_score   ->   test_score1 test_score2 ... test_score22
                             hh_applied   ->   hh_applied1 hh_applied2 ... hh_applied22
                        hh_win_in_batch   ->   hh_win_in_batch1 hh_win_in_batch2 ... hh_win_in_batch22
                                  aaa1_   ->   aaa1_1 aaa1_2 ... aaa1_22
                                  aaa2_   ->   aaa2_1 aaa2_2 ... aaa2_22
                                  aaa3_   ->   aaa3_1 aaa3_2 ... aaa3_22
                                  aaa4_   ->   aaa4_1 aaa4_2 ... aaa4_22
                                  aaa5_   ->   aaa5_1 aaa5_2 ... aaa5_22
                                  aaa6_   ->   aaa6_1 aaa6_2 ... aaa6_22
                                  aaa7_   ->   aaa7_1 aaa7_2 ... aaa7_22
                                  aaa8_   ->   aaa8_1 aaa8_2 ... aaa8_22
                                  aaa9_   ->   aaa9_1 aaa9_2 ... aaa9_22
                                 aaa20_   ->   aaa20_1 aaa20_2 ... aaa20_22
                                 aaa21_   ->   aaa21_1 aaa21_2 ... aaa21_22
                                 aaa22_   ->   aaa22_1 aaa22_2 ... aaa22_22
                                 aaa23_   ->   aaa23_1 aaa23_2 ... aaa23_22
                                 aaa24_   ->   aaa24_1 aaa24_2 ... aaa24_22
                                 aaa25_   ->   aaa25_1 aaa25_2 ... aaa25_22
                                 aaa26_   ->   aaa26_1 aaa26_2 ... aaa26_22
                                 aaa27_   ->   aaa27_1 aaa27_2 ... aaa27_22
                                 aaa31_   ->   aaa31_1 aaa31_2 ... aaa31_22
                                 aaa32_   ->   aaa32_1 aaa32_2 ... aaa32_22
                                 aaa33_   ->   aaa33_1 aaa33_2 ... aaa33_22
                                 aaa34_   ->   aaa34_1 aaa34_2 ... aaa34_22
                                 aaa35_   ->   aaa35_1 aaa35_2 ... aaa35_22
                                 aaa36_   ->   aaa36_1 aaa36_2 ... aaa36_22
                                 aaa37_   ->   aaa37_1 aaa37_2 ... aaa37_22
                                 aaa38_   ->   aaa38_1 aaa38_2 ... aaa38_22
-----------------------------------------------------------------------------

. 
. * Clean up data
. * keep only first copy of duplicated variables
.         foreach var in first_apply_batch year_dob anon_month_dob gender ///
>                 test_score education aaa1_ aaa2_ aaa3_ aaa4_ aaa5_ aaa6_ aaa7_ ///
>                 aaa8_ aaa9_ aaa20_ aaa21_ aaa22_ aaa23_ aaa24_ aaa25_ aaa26_ aaa27_ ///
>                 aaa31_ aaa32_ aaa33_ aaa34_ aaa35_ aaa36_ aaa37_ aaa38_ {
  2.                   egen `var' = rowfirst(`var'*)
  3.                   drop `var'?*
  4.         }
(13,183 missing values generated)
(34,477 missing values generated)
(13,183 missing values generated)
(10,857,690 missing values generated)
(21,476,431 missing values generated)
(12,538,523 missing values generated)
(15,948,181 missing values generated)
(22,223,866 missing values generated)
(22,409,902 missing values generated)
(22,899,909 missing values generated)
(23,155,568 missing values generated)
(23,156,404 missing values generated)
(13,625,935 missing values generated)
(22,127,505 missing values generated)
(14,643,809 missing values generated)
(22,861,799 missing values generated)
(22,420,552 missing values generated)
(16,534,910 missing values generated)
(23,034,252 missing values generated)
(22,420,552 missing values generated)
(16,458,762 missing values generated)
(22,553,991 missing values generated)
(17,062,211 missing values generated)
(22,989,489 missing values generated)
(22,721,942 missing values generated)
(18,279,316 missing values generated)
(23,082,364 missing values generated)
(22,721,942 missing values generated)

. 
.         gen anon_hh_id = .
(23,157,486 missing values generated)

.         gen win_batch = .
(23,157,486 missing values generated)

. 
.         qui forval i = 1 / 22 {

. 
.         rename aaa*_ aaa*

.         drop anon_hh_id?*

. 
. * Save raw merged reshaped data
. datasignature 
  23157486:144(24674):2193512926:3097620854

. if "`r(datasignature)'" == "23157486:144(24674):2193512926:3097620854" {
.    save "$KP_deid_admin/Clean/pmo_b1-22_raw_wide.dta", replace
(file /proj/pbolken/pkrepkitdata/Data/Admin/Clean/pmo_b1-22_raw_wide.dta not found)
file /proj/pbolken/pkrepkitdata/Data/Admin/Clean/pmo_b1-22_raw_wide.dta saved
.       }

. else {
.    di as err "Careful, your machine produces a different dataset"
.    stop
.                 }

.         
. 
.         cap log close
